{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "import time\n",
    "import pandas as pd\n",
    "sys.path.insert(0, os.path.realpath('..'))\n",
    "import django\n",
    "django.setup()\n",
    "from django.db import connections\n",
    "pd.set_option('display.max_rows', 3000)\n",
    "from IPython.display import display\n",
    "from firecares.firestation.models import FireDepartment\n",
    "import editdistance\n",
    "from IPython.lib.pretty import pprint\n",
    "from firecares.firestation.templatetags import firecares_tags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('./2015_AFG_FDID.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fd_names = FireDepartment.objects.values_list('name', 'state')\n",
    "\n",
    "fd_mappings = {\n",
    "    'NC': {\n",
    "        '16045': '01645'\n",
    "    },\n",
    "    'PA': {\n",
    "        '54064': '54564'\n",
    "    }\n",
    "}\n",
    "\n",
    "def massage_fdid(fdid, state):\n",
    "    import re\n",
    "    if state in fd_mappings and fdid in fd_mappings[state]:\n",
    "        return fd_mappings[state][fdid]\n",
    "    stripped = re.sub('-|\\s+', '', fdid)\n",
    "    if len(stripped) < 5 and stripped:\n",
    "        return fdid.zfill(5)\n",
    "    elif len(stripped) > 5 and stripped.lower().startswith(state.lower()):\n",
    "        return re.sub('^{}'.format(state), '', stripped)\n",
    "    else:\n",
    "        return stripped\n",
    "    \n",
    "def fuzzy_find_department(name, fdid, state):\n",
    "    massaged_fdid = massage_fdid(fdid, state)\n",
    "    if massaged_fdid:\n",
    "        fds = FireDepartment.objects.filter(fdid__iexact=massaged_fdid, state__iexact=state)\n",
    "        if fds.exists():\n",
    "            if len(fds) > 1:\n",
    "                raise Exception('Multiple matches for {} - {} - {}'.format(name, fdid, state))\n",
    "            return fds.first(), None\n",
    "        else:\n",
    "            fds = FireDepartment.objects.filter(state__iexact=state, name__iexact=name)\n",
    "            if fds.exists():\n",
    "                return fds.first(), None\n",
    "            potentials = filter(lambda x: x[1] == state, fd_names)\n",
    "            matches = sorted((editdistance.eval(name, x[0]), x[0], name, fdid, state) for x in potentials)\n",
    "            return None, matches[:5]\n",
    "    else:\n",
    "        return None, None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "hits = []\n",
    "misses = []\n",
    "records = df.to_dict(orient='index')\n",
    "for index, data in records.items():\n",
    "    state = data.get('STATE')\n",
    "    fdid = data.get('FDID')\n",
    "    name = data.get('Name')\n",
    "    dept, options = fuzzy_find_department(name, fdid, state)\n",
    "    if dept:\n",
    "        print 'Hit on {}, row {}'.format(dept, index)\n",
    "        hits.append((index, dept))\n",
    "    else:\n",
    "        if options:\n",
    "            print 'Options for {}, {} - {}:'.format(name, state, fdid)\n",
    "            pprint(options)\n",
    "            misses.append((index, name, fdid, state, options))\n",
    "        else:\n",
    "            misses.append((index, name, fdid, state, None))\n",
    "            print 'Complete miss on {}, missing fdid : {}'.format(name, fdid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print 'Percent empty: {}'.format(float(len(misses)) / (len(hits) + len(misses)))\n",
    "display(hits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pprint(misses)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Invalid FDID (either in FC or in the CSV):\n",
    "Emerald Isle Fire Department, NC - 16045 / 01645 (FC)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_risk(n):\n",
    "    if n == 1:\n",
    "        return 'Low'\n",
    "    elif n == 2 or n == 3:\n",
    "        return 'Medium'\n",
    "    elif n == 4:\n",
    "        return 'High'\n",
    "    else:\n",
    "        return 'Unknown'\n",
    "\n",
    "def explode_department(fd):\n",
    "    if fd.population_class is not None:\n",
    "        pop_range = FireDepartment.POPULATION_CLASSES[fd.population_class][1]\n",
    "    else:\n",
    "        pop_range = 'Unknown'\n",
    "        \n",
    "    return [fd.region,\n",
    "            pop_range,\n",
    "            fd.metrics.dist_model_score.all,\n",
    "            to_risk(fd.metrics.community_fire_risk.all),\n",
    "            to_risk(fd.metrics.community_fire_spread_risk.all),\n",
    "            to_risk(fd.metrics.community_death_and_injury_risk.all)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "exploded_hits = map(lambda x: (list(df.iloc[x[0]][['Name', 'STATE', 'FDID']]) + explode_department(x[1])), hits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = pd.DataFrame(data=exploded_hits, columns=df.columns)\n",
    "display(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_options(opt):\n",
    "    if opt and opt[4]:\n",
    "        return ','.join(map(lambda x: x[1], opt[4]))\n",
    "\n",
    "df_misses = map(lambda x: (x[1], x[3], x[2], 'Not found', 'Not found', 'Not found', 'Not found', 'Not found', 'Not found', get_options(x)), misses)\n",
    "\n",
    "df3 = pd.DataFrame(data=df_misses, columns=['Name', 'STATE', 'FDID', 'NFPA Region', 'Population Range', 'PERF_SCORE', 'FIRE_RISK', 'FIRE_SPREAD_RISK', 'DEAT_INJURY_RISK', 'FD_OPTIONS_IN_STATE'])\n",
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4 = df2.append(df3)[['Name', 'STATE', 'FDID', 'NFPA Region', 'Population Range', 'PERF_SCORE', 'FIRE_RISK', 'FIRE_SPREAD_RISK', 'DEAT_INJURY_RISK', 'FD_OPTIONS_IN_STATE']]\n",
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df4.to_csv('/tmp/afg_populated.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
